//produce db relating to migrants recruited by agencies  

clear
set memory 500m
set matsize 100
set more off 
# delimit;

* Set directory; 

cd "~/Desktop/migrec_replication/do/";


** append self datasets; 

foreach i of numlist 5/15 {; 

insheet using ../raw_secure/self-`i'.csv, comma clear; 

cap tostring cn, replace; 
cap tostring birth_day, replace; 
cap tostring salary, replace; 
cap tostring currency, replace; 

save ../dta_secure/self-`i'.dta, replace; 

};


use ../dta_secure/self-5.dta,  clear; 


foreach i of numlist 6/15 {; 

append using ../dta_secure/self-`i'.dta, force; 

};


save ../dta_secure/self.dta, replace; 

** STEP 1: get rid of troublesome passports; 


drop if ppno == ""; 
drop if substr(ppno,1,1) == "." |  substr(ppno,1,1) == "-" | 
substr(ppno,1,1) == "," |  substr(ppno,1,1) == "0" |  substr(ppno,1,1) == "4"; 


** STEP 2: figure out dates; 

gen departure_year = substr(dep_date,-7,2); 
replace departure_year = "20" + departure_year; 

gen departure_month = substr(dep_date,1,2); 
gen rep_m = substr(dep_date,1,1); 
replace departure_month = rep_m if substr(departure_month,-1,1) == "/" ;


save ../dta_secure/self.dta, replace; 

** STEP 3: figure out countries ;


gen zeros = ""; 

destring cn, force gen(cat); 

tostring cat, force gen(cn_code); 

replace zeros = "00" if cat > 0 & cat <= 9;
replace zero = "0" if cat > 9 & cat < 100; 

replace cn_code = zeros + cn_code; 




merge m:1 cn_code using ../dta_secure/country.dta, gen(country_code) force;
keep if country_code != 2; 


** STEP 4: figure out villages; 

rename town_code towncode; 

merge m:1 towncode using ../dta_secure/villages.dta, gen(townmerge);

keep if townmerge != 2; 


** STEP 5: figure out sectors; 


rename jobcate job_code; 

merge m:1 job_code using ../dta_secure/job_codes, gen(jobcodemerge); 

keep if jobcodemerge!= 2; 



** STEP 6: Convert currencies ;

*note, does not account for inflation;

rename cn_currancy cuunit;

merge m:1 cuunit using ../dta_secure/currencies.dta, gen(currencymerge); 

keep if currencymerge != 2; 

destring salary, replace; 

gen usd_salary = ((salary*cn_currency_rate)/132.9);



** STEP 8: Figure out age; 

gen birth_yr = substr(birth_day, -7,2); 

destring birth_yr, force replace; 

label var birth_yr "year of birth"; 

replace birth_yr = birth_yr + 1900; 

gen current_age = 2016-birth_yr; 

label var current_age "current age";


save ../dta_secure/self.dta, replace; 


** ADD ANF JOB SECTOR CLASSIFICATIONS; 

merge m:1 jobcode_jobsum using ../res/jobcode_classification_anf.dta, gen(jobcode_class_anf);



** ADD DISTRICT DATA; 

merge m:1 vil_dis_code using ../dta_secure/districts.dta, gen(district_merge); 


** MISC; 

* clean up years; 

 keep if departure_year >= "2005" & departure_year <= "2015" ;
 
 

 save ../dta_secure/self.dta, replace; 

 
 